The goal of this notebook is to visualize the phylogenetic relationship of the haplotypes identified in the previous notebooks. I’ll read in the tree’s from SPRUCE as implemented by MACHINA and make a nicely formatted tree where branch length corresponds to mutations. The output of this notebook is a final filtered set of cluster relationships that can be made into a phylogenetic tree.

## ==== File paths input data ==== ##

if (exists("snakemake")) {

  # Data from lofreq and varscan labeled with subclonal haplotypes
  updated.data = snakemake@params[['incsv']]

  # Spruce 
  spruce.data = snakemake@input[[1]]
  
  # Counts of all reads bridging pairs of SNP positions
  bridging.data = snakemake@input[[2]]
  
  # Original unfiltered variant calls
  variant.data = snakemake@params[['rawvariants']]

} else {

  # Data from lofreq and varscan labeled with subclonal haplotypes
  updated.data = "../../results/variants/validated_variants.csv"

  # Spruce 
  spruce.data = "../../results/variants/spruce_output.tsv"
  
  # Counts of all reads bridging pairs of SNP positions
  bridging.data = "../../results/bridging/bridging_reads.csv"
  
  # Original unfiltered variant calls
  variant.data = "../../results/variants/variants.csv"

}
## ==== File paths output data ==== ##

if (exists("snakemake")) {
  
  # Path to save figures
  figure.dir = paste0(snakemake@params[['figures']], "/")
  
  # Path to save the polished assigned variants
  output.path = snakemake@params[['final_tree']]

} else {

  # Path to save figures
  figure.dir = "../../results/figures/"
  
  # Path to save the polished assigned variants
  output.path = "../../results/phylogeny/filtered_spruce_tree.csv"

}

# Make the figure directory if it doesn't exist
if (!file.exists(figure.dir)) {
  dir.create(figure.dir, recursive = TRUE)
  print("Directory created.")
} else {
  print("Directory already exists.")
}
## [1] "Directory already exists."

SPRUCE Trees

A set of phylogenetic relationships were fit to the data using the algorithm SPRUCE. We’ll visualize these possible trees below. Ultimately, the goal is filter these based on bridging reads.

Let’s visualize all of these trees below.

There are total of 36 trees that fit to the data. Below, I’ll use bridging reads to test the edges for these trees and filter them down to a final plausible set of trees.

Filter SPRUCE Trees

First, we already have some knowledge about whether a cluster is descended from either Genome 1 or Genome 2. Let’s filter out trees based on this information.

cluster.backgrounds = read_csv(updated.data, show_col_types = F) %>% 
  # For consistency, change the names of G1, G2, and G-1-1
  mutate(Haplotype = case_when(
    Haplotype == "genome-1" ~ "genome 01",
    Haplotype == "genome-1-1" ~ "genome 1",
    Haplotype == "genome-2" ~ "genome 2",
    T ~ Haplotype
  )) %>% 
  select(Haplotype, Background) %>% 
  distinct() %>% 
  filter(!Haplotype %in% c("both", "subclonal")) %>% 
  add_row(Haplotype = "Anc", Background = "Anc")
  

trees = unique(tree.df$tree)
trees.with.genotype.violations = c()
for (tree.name in trees) {
  
  # Get the edges from this tree
  tree.edges = tree.df %>% 
    filter(tree == tree.name) %>% 
    select(from, to)
  
  # Check if there is a violation in any of the edges
  for (i in 1:nrow(tree.edges)) {
    # Get the edge
    edge = tree.edges[i,]
    # Get the cluster names 
    from = str_replace_all(edge$from, "_", " ")
    to = str_replace_all(edge$to, "_", " ")
    # Get backgrounds 
    from.background = cluster.backgrounds %>% 
      filter(Haplotype == from) %>% 
      pull(Background)
    to.background = cluster.backgrounds %>% 
      filter(Haplotype == to) %>% 
      pull(Background)
    
    if (from == "genome 01" & to.background != "genome-1") {
      print(paste("Violation in", tree.name, " -- from:", from, "to:", to))
      trees.with.genotype.violations = c(trees.with.genotype.violations, tree.name)
    } else if (from == "genome 2" & to.background != "genome-2") {
      print(paste("Violation in", tree.name, " -- from:", from, "to:", to))
      trees.with.genotype.violations = c(trees.with.genotype.violations, tree.name)
    } else if (from.background == "genome-1" & to.background == "genome-2") {
      print(paste("Violation in", tree.name, " -- from:", from, "to:", to))
      trees.with.genotype.violations = c(trees.with.genotype.violations, tree.name)
    } else if (from.background == "genome-2" & to.background == "genome-1") {
      print(paste("Violation in", tree.name, " -- from:", from, "to:", to))
      trees.with.genotype.violations = c(trees.with.genotype.violations, tree.name)
    }
  }
}
## [1] "Violation in tree 1  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 4  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 4  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 5  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 6  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 7  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 7  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 8  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 9  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 9  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 10  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 11  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 11  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 11  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 12  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 12  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 13  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 13  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 13  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 13  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 14  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 14  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 14  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 14  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 15  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 15  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 15  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 16  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 16  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 16  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 17  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 17  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 17  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 18  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 18  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 18  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 19  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 19  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 19  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 20  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 20  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 20  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 21  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 21  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 22  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 22  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 23  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 23  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 24  -- from: cluster 9 to: cluster 6"
## [1] "Violation in tree 24  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 25  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 26  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 26  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 27  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 28  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 29  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 29  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 30  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 31  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 31  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 31  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 32  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 32  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 32  -- from: genome 2 to: cluster 8"
## [1] "Violation in tree 33  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 33  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 34  -- from: genome 1 to: cluster 12"
## [1] "Violation in tree 34  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 35  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 35  -- from: cluster 2 to: cluster 12"
## [1] "Violation in tree 36  -- from: genome 1 to: cluster 13"
## [1] "Violation in tree 36  -- from: cluster 2 to: cluster 12"
trees.with.genotype.violations = unique(trees.with.genotype.violations)

There are 34 of 36 total trees that violate the bridging read support for a given background (Genome 1 or Genome 2).

The remaining 2 trees are still plausible. Can we determine which of these are more likely?

Check Valid Trees

To determine if the remaining tress fit our data, I’ll see if there are cluster relationships (i.e. the tree predicts that one cluster is descended from another) that we can either confirm or disprove using our bridging reads over pairs of SNPs.

To do this, I’ll use the same approach that was used to assign clusters to either Genome 1 or Genome 2. However, in this version, I’ll be testing whether a cluster is descended from another cluster, or if they arose independently.

For this approach, we need the individual mutations and their frequency, the mean frequency of every cluster in each tissue sample, and the bridging reads.

## `summarise()` has grouped output by 'Tissue', 'Haplotype'. You can override
## using the `.groups` argument.
## Joining with `by = join_by(snp_1)`
## Warning in left_join(., dplyr::rename(haplotype.labels, snp_1 = POS, hap_1 = Haplotype)): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 12317 of `x` matches multiple rows in `y`.
## ℹ Row 430 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## Joining with `by = join_by(snp_2)`
## Warning in left_join(., dplyr::rename(haplotype.labels, snp_2 = POS, hap_2 = Haplotype)): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 7011 of `x` matches multiple rows in `y`.
## ℹ Row 181 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

Let’s check if there are cluster relationships that we can confirm or deny by read support.

cluster.test.df = data.frame()

for (tree.name in unique(valid.trees.df$tree)) {
  
  # Get the edges from this tree
  tree.edges = valid.trees.df %>% 
    filter(tree == tree.name) %>% 
    select(from, to)
  
  # Check the evidence for nesting between clusters that are linked on the tree
  for (i in 1:nrow(tree.edges)) {
    # Get the edge
    edge = tree.edges[i,]
    
    # Get the cluster names 
    from = str_replace_all(edge$from, "_", " ")
    to = str_replace_all(edge$to, "_", " ")
    
    # Check if it's a cluster is predicted to descend from another cluster
    if(str_detect(from, "cluster") & str_detect(to, "cluster")) {
      
      # Status statement
      print(paste("Checking if", to, "is descended from", from, "for", tree.name))
      
      # Filter the bridging reads for overlaps between the clusters being tested
      snp.pairs = labled.bridging.df %>%  
        filter((hap_1 == to & hap_2 == from) | (hap_1 == from & hap_2 == to)) %>% 
        select(snp_1, snp_2) %>% 
        distinct()
      
      # Check if there are any bridging to test
      if (nrow(snp.pairs) == 0) { 
        # There aren't any bridging reads overlapping these SNPs
        linkage.information = "no information"
        print(linkage.information)
        # Write to a row
        row = tibble(snp_1 = NA, snp_2 = NA,
               hap_1 = NA, hap_2 = NA,
               from = from, to = to,
               lik_related = NA, lik_unrelated = NA,
               tree = tree.name, relationship = linkage.information)
        cluster.test.df = rbind(cluster.test.df, row)
        # Go to the next haplotypes
        next
      }

      for (i in 1:nrow(snp.pairs)) {
        
        # Get the positions of the SNPs
        snp.1 = snp.pairs[i,]$snp_1
        snp.2 = snp.pairs[i,]$snp_2
        
        # Print the SNP overlaps being checked 
        print(paste("Checking SNPs:", snp.1, "v.", snp.2))
        
        # Get the cluster names
        snp.1.cluster = haplotype.labels %>% 
          filter(POS == snp.1) %>% 
          pull()
        snp.2.cluster = haplotype.labels %>% 
          filter(POS == snp.2) %>% 
          pull()
        
        # Get the overlaps
        overlaps = labled.bridging.df %>% 
          filter(snp_1 == snp.1, snp_2 == snp.2)
        
        # Iterate over each tissue in the overlaps
        tissue.test = foreach(i = 1:nrow(overlaps), .combine = "rbind") %do% {
          
          # Get the tissue
          tissue = overlaps[i, ]$Tissue
          
          # Get the cluster frequencies
          f.anc = max(0, (haplotype.mean %>% filter(Haplotype == from, Tissue == tissue))$AF)
          f.dec = max(0, (haplotype.mean %>% filter(Haplotype == to, Tissue == tissue))$AF)
          
          # Overlaps to test in this tissue
          toTest = overlaps %>%
            filter(Tissue == tissue)
              
          # x11 refers to the number of reads overlapping both
          x11 = toTest$`11`
          if (snp.1.cluster == from) {
            # x10 is the number of reads overlapping just descendant
            x10 = toTest$`10`
            # x01 is the number of reads overlapping just ancestor
            x01 = toTest$`01`
          } else {
            # x10 is the number of reads overlapping just descendant (01 in data)
            x10 = toTest$`01`
            # x01 is the number of reads overlapping just ancestor (10 in data)
            x01 = toTest$`10`
          }
          # x00 is the number of reads overlapping neither
          x00 = toTest$`00`
          
          # There are two possibilities
          # 1: The inferred descendant does arise on the ancestor 
          # 2: The inferred descendant and ancestor arose independently 
          
          error = 0.01
          
          # Descendant and ancestor should be all of the descendant reads
          f11 = (f.dec + error)/(1 + 4*error)
          # Only the descendant should be just the error
          f10 = (0 + error)/(1 + 4*error)
          # Only the ancestor should be the frequency of the ancestor minus the descendant
          f01 = max(0 + error, (f.anc - f.dec + error))/(1 + 4*error) 
          # The remainder should be neither
          f00 = 1 - f11 - f10 - f01  
          
          # What is the probability of observing these counts given the haplotypes are nested?
          lik.nested = -dmultinom(c(x11, x10, x01, x00), prob = c(f11, f10, f01, f00), log = T) 
          
          # We should not see any reads with both
          f11 = (0 + error)/(1 + 4*error)
          # Just the ancestor 
          f01 = (f.anc + error)/(1 + 4*error)
          # Just the descendant
          f10 = (f.dec + error)/(1 + 4*error)
          # The remainder should be the subtraction
          f00 = 1 - f11 - f01 - f10
          
          # What is the probability of observing these counts given the haplotypes are not nested?
          lik.independant = -dmultinom(c(x11, x10, x01, x00), prob = c(f11, f10, f01, f00), log = T)
          
          # Results
          results = c(lik.nested, lik.independant)
        }

        AICs = 2 * apply(tissue.test, 2, sum, na.rm = TRUE)
        AIC.min = min(AICs)
        rel.related = exp((AIC.min - AICs[1])/2)
        rel.unrelated = exp((AIC.min - AICs[2])/2)
        lik.related = rel.related/(rel.related + rel.unrelated)
        lik.unrelated = rel.unrelated/(rel.related + rel.unrelated)
        
        linkage.information = c("Nesting", "Non-nesting", "neither")[c(lik.related > 0.95, lik.unrelated > 0.95,
                                                                       lik.related < 0.95 & lik.unrelated < 0.95)]
        print(linkage.information)
        # Write to a row
        row = tibble(snp_1 = snp.1, snp_2 = snp.2,
               hap_1 = snp.1.cluster, hap_2 = snp.2.cluster,
               from = from, to = to,
               lik_related = lik.related, lik_unrelated = lik.unrelated,
               tree = tree.name, relationship = linkage.information)
        
        cluster.test.df = rbind(cluster.test.df, row)

      }
      
    }

  }
  
}
## [1] "Checking if cluster 5 is descended from cluster 1 for tree 2"
## [1] "no information"
## [1] "Checking if cluster 8 is descended from cluster 2 for tree 3"
## [1] "Checking SNPs: 2343 v. 2543"
## [1] "Non-nesting"
## [1] "Checking SNPs: 2349 v. 2543"
## [1] "Non-nesting"
## [1] "Checking SNPs: 5368 v. 5405"
## [1] "Non-nesting"
## [1] "Checking if cluster 5 is descended from cluster 1 for tree 3"
## [1] "no information"

It looks like we can confidently say that cluster 8 is not descended from cluster 2. This means that can rule out all but a single single valid tree. Based on this final tree assignment, we’ve renamed the clusters below. In the renamed cluster scheme, 8 becomes cluster 6.

##   neither cluster_2 cluster_8      both 
##      3059      1928       435         0

There is only 1 tree with violations and that’s tree 3.

Finally, now that we know how the haplotypes are related to one another, we can re-name them to something that is a little more clear.

final.tree.df = final.tree.df %>% 
  mutate(to = case_when(
    to == "cluster_1" ~  "cluster 1",
    to == "cluster_2" ~  "cluster 2",
    to == "cluster_3" ~  "cluster 3",
    to == "cluster_4" ~  "cluster 4",
    to == "cluster_5" ~  "cluster 1a",
    to == "cluster_6" ~  "G-FC2",
    to == "cluster_7" ~  "cluster 5",
    to == "cluster_8" ~  "cluster 6",
    to == "cluster_9" ~  "cluster 7",
    to == "cluster_10" ~  "cluster 8",
    to == "cluster_11" ~  "cluster 9",
    to == "cluster_12" ~  "cluster 10",
    to == "cluster_13" ~  "cluster 11",
    to == "genome_01" ~  "G-01",
    to == "genome_1" ~  "G-1",
    to == "genome_2" ~  "G-2",
    TRUE ~ to)) %>% 
  mutate(from = case_when(
    from == "cluster_1" ~  "cluster 1",
    from == "cluster_2" ~  "cluster 2",
    from == "cluster_3" ~  "cluster 3",
    from == "cluster_4" ~  "cluster 4",
    from == "cluster_5" ~  "cluster 1a",
    from == "cluster_6" ~  "G-FC2",
    from == "cluster_7" ~  "cluster 5",
    from == "cluster_8" ~  "cluster 6",
    from == "cluster_9" ~  "cluster 7",
    from == "cluster_10" ~  "cluster 8",
    from == "cluster_11" ~  "cluster 9",
    from == "cluster_12" ~  "cluster 10",
    from == "cluster_13" ~  "cluster 11",
    from == "genome_01" ~  "G-01",
    from == "genome_1" ~  "G-1",
    from == "genome_2" ~  "G-2",
    TRUE ~ from))

haplotype.mean = haplotype.mean %>% 
  mutate(Haplotype = case_when(
    Haplotype == "cluster 1" ~  "cluster 1",
    Haplotype == "cluster 2" ~  "cluster 2",
    Haplotype == "cluster 3" ~  "cluster 3",
    Haplotype == "cluster 4" ~  "cluster 4",
    Haplotype == "cluster 5" ~  "cluster 1a",
    Haplotype == "cluster 6" ~  "G-FC2",
    Haplotype == "cluster 7" ~  "cluster 5",
    Haplotype == "cluster 8" ~  "cluster 6",
    Haplotype == "cluster 9" ~  "cluster 7",
    Haplotype == "cluster 10" ~  "cluster 8",
    Haplotype == "cluster 11" ~  "cluster 9",
    Haplotype == "cluster 12" ~  "cluster 10",
    Haplotype == "cluster 13" ~  "cluster 11",
    Haplotype == "genome 01" ~  "G-01",
    Haplotype == "genome 1" ~  "G-1",
    Haplotype == "genome 2" ~  "G-2",
    TRUE ~ Haplotype))

haplotype.colors =  c("#636361", "#989891", "#000000", "#22488F", "#B02B23", "#E1EAF6",  "#A6C9DF", "#7AADD2","#5691C1", "#3871B0", "#67649F", "#8484B1", "#FFFCBA", "#F9D984", "#F4B460", "#EE9B4F", "#DE4B32")
names(haplotype.colors) = c("Anc", "G-01", "G-FC2", "G-1", "G-2", "cluster 1", "cluster 1a", "cluster 2", "cluster 3", "cluster 4", "cluster 5", "cluster 6", "cluster 7", "cluster 8", "cluster 9", "cluster 10", "cluster 11")

## [1] "Writing final tree to results/phylogeny/filtered_spruce_tree.csv"

Haplotype Frequencies

Now we can confirm that cluster 5 is on the background of cluster 1. Let’s visualize this in the format of a ‘psuedo-mueller’ plot.

## `summarise()` has grouped output by 'Tissue', 'Haplotype'. You can override
## using the `.groups` argument.

Finally, we can save a table with the mutations in these haplotyes to include in the manuscript.

expanded.df %>% 
  filter(!Haplotype %in% c("subclonal", "both")) %>% 
  # Rename to the paper scheme
  mutate(Haplotype = case_when(
    Haplotype == "cluster 1" ~  "cluster 1",
    Haplotype == "cluster 2" ~  "cluster 2",
    Haplotype == "cluster 3" ~  "cluster 3",
    Haplotype == "cluster 4" ~  "cluster 4",
    Haplotype == "cluster 5" ~  "cluster 1a",
    Haplotype == "cluster 6" ~  "Genome-FC2",
    Haplotype == "cluster 7" ~  "cluster 5",
    Haplotype == "cluster 8" ~  "cluster 6",
    Haplotype == "cluster 9" ~  "cluster 7",
    Haplotype == "cluster 10" ~  "cluster 8",
    Haplotype == "cluster 11" ~  "cluster 9",
    Haplotype == "cluster 12" ~  "cluster 10",
    Haplotype == "cluster 13" ~  "cluster 11",
    Haplotype == "genome 01" ~  "Genome-01",
    Haplotype == "genome 1" ~  "Genome-1",
    Haplotype == "genome 2" ~  "Genome-2",
    TRUE ~ Haplotype)) %>% 
  # Rename the backgrounds to be consistent with the Haplotypes
  mutate(Background = case_when(
    Background == "genome-1" ~  "Genome-1",
    Background == "genome-2" ~  "Genome-2",
    TRUE ~ Background)) %>% 
  mutate(
    Reference = str_extract(SNP, "^[A-Z]"),
    Position = as.numeric(str_extract(SNP, "\\d+")),
    Variant = str_extract(SNP, "[A-Z]$")
  ) %>% 
  select(!c(DP, POS, SNP)) %>%
  rename(`Allele Frequency` = "AF") 
## # A tibble: 1,875 × 7
##    Tissue     `Allele Frequency` Background Haplotype Reference Position Variant
##    <chr>                   <dbl> <chr>      <chr>     <chr>        <dbl> <chr>  
##  1 Cerebellu…             0.0628 Genome-1   cluster 6 T              332 C      
##  2 Frontal C…             0      Genome-1   cluster 6 T              332 C      
##  3 Frontal C…             0      Genome-1   cluster 6 T              332 C      
##  4 Temporal …             0      Genome-1   cluster 6 T              332 C      
##  5 Frontal C…             0      Genome-1   cluster 6 T              332 C      
##  6 Internal …             0      Genome-1   cluster 6 T              332 C      
##  7 Upper Bra…             0.0278 Genome-1   cluster 6 T              332 C      
##  8 Parietal …             0      Genome-1   cluster 6 T              332 C      
##  9 Occipital…             0      Genome-1   cluster 6 T              332 C      
## 10 Midbrain               0.0784 Genome-1   cluster 6 T              332 C      
## # ℹ 1,865 more rows